import requests
from lxml import etree


def get_html(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 Edg/91.0.864.67"
    }
    html = requests.get(url=url, headers=headers)
    if html.status_code == 200:
        return html.text
    else:
        return False


def get_arr(text):
    html = etree.HTML(text, etree.HTMLParser())
    arr = html.xpath('//ol[@class="grid_view"]/li/div[@class="item"]')
    return arr


def get_all_arr():
    number = 0
    arr_list = []
    while number < 250:
        print(number)
        url = "https://movie.douban.com/top250?start={}&filter=".format(str(number))
        arr_list += get_arr(get_html(url))
        number += 25
    return arr_list
    # return get_arr(get_html("https://movie.douban.com/top250?start=225&filter="))


def set_txt_file():
    string = ""
    arr = get_all_arr()
    file = open('002.txt', 'w', encoding='utf-8')
    for item in arr:
        # 名称
        # print(item.xpath('.//div[@class="info"]/div[@class="hd"]/a/span[@class="title"]/text()')[0])
        # 英文名称
        # print(item.xpath('.//div[@class="info"]/div[@class="hd"]/a/span[@class="title"]/text()')[1])
        string += '-----\n[名字]: '
        for i in item.xpath('.//div[@class="info"]/div[@class="hd"]/a/span[@class="title"]/text()'):
            # print(i)
            string += i
        string += '\n'
        # 别名
        # print(item.xpath('.//div[@class="info"]/div[@class="hd"]/a/span[@class="other"]/text()')[0])
        string += "[别名]: {}\n".format(item.xpath('.//div[@class="info"]/div[@class="hd"]/a/span[@class="other"]/text()')[0])
        # 图片
        # print(item.xpath('.//div[@class="pic"]/a/img/@src')[0])
        string += "[图片]: {}\n".format(item.xpath('.//div[@class="pic"]/a/img/@src')[0])
        # 链接
        # print(item.xpath('.//div[@class="pic"]/a/@href')[0])
        string += "[链接]: {}\n".format(item.xpath('.//div[@class="pic"]/a/@href')[0])
        # 信息
        # print(item.xpath('.//div[@class="info"]/div[@class="bd"]/p/text()')[0])
        string += "[信息]: {}\n".format(item.xpath('.//div[@class="info"]/div[@class="bd"]/p/text()')[0].strip())
        # 评分
        # print(item.xpath('.//div[@class="info"]/div[@class="bd"]/div[@class="star"]/span[@class="rating_num"]/text()')[0])
        string += "[评分]: {}\n".format(item.xpath('.//div[@class="info"]/div[@class="bd"]/div[@class="star"]/span[@class="rating_num"]/text()')[0])
        # 评价人数
        # print(item.xpath('.//div[@class="info"]/div[@class="bd"]/div[@class="star"]/span/text()')[3])
        string += "[评价人数]: {}\n".format(item.xpath('.//div[@class="info"]/div[@class="bd"]/div[@class="star"]/span[last()]/text()')[0])
        # 点评
        # print(item.xpath('.//div[@class="info"]/div[@class="bd"]/p/span/text()')[3])
        string += "[点评]:"
        for i in item.xpath('.//div[@class="info"]/div[@class="bd"]/p/span/text()'):
            string += i.strip()
        string += '\n'
    file.write(string)
    file.close()

    '123\n456'


set_txt_file()
# get_arr(get_html())
# get_all_arr()

